library(redist)
library(tidyverse)
library(maptools)
library(doMC)
library(igraph)

## -------------------------
## Format map down to subset
## -------------------------
## Load map
fl_map <- readShapePoly("../data/fl/FL.shp")

## Laod nodes
nodes <- strsplit(readLines("../data/largemap_enum_all/map_sub_nodes_89.dat"), " ")[[1]]

## First, subset down to right map
fl_sub <- fl_map[which(rownames(fl_map@data) %in% nodes),]

## Then, reorder nodes to be correct order to match - otherwise, won't
## line up properly with original map
fl_sub <- fl_sub[match(nodes, rownames(fl_sub@data)),]

## Convert factor columns to characters, because this particular data is messy
indx <- sapply(fl_sub@data, is.factor)
fl_sub@data[indx] <- lapply(
    fl_sub@data[indx], function(x) as.numeric(as.character(x))
)

## --------------------
## Run enumpart for 50k
## --------------------
system("../enumpart_private/enumpart ../data/largemap_enum_all/map_sub_ordered_89.dat -k 2 -sample 50000 > ../data/largemap_enum_all/solutions_test_sample.dat")

## Load solutions
sols <- readLines("../data/largemap_enum_all/solutions_test_sample.dat")

## Create edgelist
el <- strsplit(readLines("../data/largemap_enum_all/map_sub_ordered_89.dat"), split = " ")
el <- apply(do.call(rbind, el), 2, as.numeric)

## Convert solutions list into congressional district assignments
## Random sampling code first
ndists <- 2
sols_out <- mclapply(1:length(sols), function(x){
    sol_split <- as.numeric(strsplit(sols[x], split = " ")[[1]])
    el_sub <- el[sol_split,]
    comps_out <- components(graph_from_edgelist(el_sub, directed = FALSE))
    if(comps_out$no < ndists){
        max_num <- max(comps_out$membership)
        comps_out <- c(comps_out$membership, (max_num + 1):ndists)
    }else{
        comps_out <- comps_out$membership
    }
    return(comps_out)
}, mc.cores = detectCores())
sols_out <- do.call(cbind, sols_out)

## Summary statistics for both
seg_truth <- unlist(mclapply(1:ncol(sols_out), function(x){
    T <- sum(fl_sub@data$pop)
    P <- sum(fl_sub@data$mccain) / T
    t_i <- tapply(fl_sub@data$pop, sols_out[,x], sum)
    p_i <- tapply(fl_sub@data$mccain, sols_out[,x], sum) / t_i
    return(sum(t_i * abs(p_i - P) / (2 * T * P * (1 - P)), na.rm = TRUE))
}, mc.cores = detectCores()))

## -------------------------------------
## Load the true seg index and benchmark
## -------------------------------------
seg_allsols <- read_csv("../data/largemap_enum_all/segregation_89.csv")

ks.test(seg_allsols$seg, seg_truth)
plot(density(seg_allsols$seg))
lines(density(seg_truth), col = "red")
